# -*- coding: utf-8 -*-
import json

import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings


class ImageItem(scrapy.Item):
    count = scrapy.Field()
    keyword = scrapy.Field()
    images = scrapy.Field()


class HdjwSpider(scrapy.Spider):
    name = "hdjw"
    # allowed_domains = ["baidu.com", 'ss3.bdstatic.com']
    start_urls = ['http://hdjw.hnu.edu.cn/Njw2017/login.html']
    index_url = "http://hdjw.hnu.edu.cn/Njw2017/index.html"

    def __init__(self, keyword=None, num=1000, **kwargs):
        super().__init__(**kwargs)
        self.keyword = keyword
        self.num = num
        self.count = 0

    def start_requests(self):
        headers = {
            "cookies": "SESSION=8c33cb7d-35ab-48d1-8bc2-9099b12fcfe6; token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJhY2MiOiIyMDE1MDgwMTA3MjAiLCJleHAiOjE1NTY3NjUxNjUsInNpZCI6IjhjMzNjYjdkLTM1YWItNDhkMS04YmMyLTkwOTliMTJmY2ZlNiJ9.N-Cw3Sg_Es0j2oxzgR_r9bsfOc1ZPmLFpk77wJ-d-ik"
        }
        yield scrapy.Request(url=self.index_url, headers=headers, callback=self.parse)

    def parse(self, response):
        # print(response.text)
        with open('hdjw.html', 'w', encoding='utf-8') as f:
            f.write(response.text)
        pass


if __name__ == "__main__":
    crawl = CrawlerProcess(get_project_settings())
    crawl.crawl('hdjw')
    crawl.start()
    pass
